/*************************************************
Program: Stata_04c_combine_census_bea.sas
Author: tf
Date: 05/11/20

input: 
firm_2007_withtypes.dta created in stata04a
firm_import.dta created in stata_03
bea data created in stata_02
imports by type create in imp_type/analyze_import_type_01.do

outputs: 
firmXcountry dataset: ./output/country_imp_aff.dta



Notes: 
Creates firmXcountry level dataset by merging together census and affiiliate data

**make firm X country dataset by merging together the firm import data and the bea data 
**make sample restrictions to firms that have either domestic or foreign M
**************************************************/


cd "xxxx"  /* PROJECT ROOT FOLDER */
global imp_type "data"
global output output
global data "data"


*1  Bring in the import by type data and collapse to the firm-country level
use "$imp_type/firm_imp_types_2007.dta", clear

 *a) Drop mineral imports/exports
  drop if substr(hs,1,2)=="27"

 *b) collapse to country level for merge
  collapse (sum) imp* rp_imp* value rp_value, by(firmid country) fast

 *c) Merge to bea country codes to collapse some of the super detailed countries
  rename country code_TD
    merge m:1 code_TD using "$data/country_data_unique.dta", keepusing(code_TD countrycode country_name iso3)
    drop _merge
    rename countrycode ctry
    drop if firmid==""
     
  *d) Collapse to BEA aggregated country names
    collapse (sum) imp* rp_imp* value rp_value, by(firmid ctry country_name)
    rename value imp_tot_type
    rename rp_value rp_imp_tot_type
    save $output/temp.dta, replace

*2. Merge to the firm-country data
 use "$data/country_imp_aff2007.dta", clear

 merge 1:1 firmid ctry country_name using $output/temp.dta
 drop if _merge==2  //  firms outside this sample
 drop _merge

 *browse firmid country_name emp_census emp_man man_aff imp_inputf imp_tot_type imp_value
 
 
label variable rp_imp_bothf "Firm reports input and production"
label variable rp_imp_nonef "Firm does not report input or production"
label variable rp_imp_inputf_np "Firm reports input but not production"
label variable rp_imp_prodf_nif "Firm reports production but not input"

*3.  Save dataset with imports by type
 save "$data/country_imp_aff2007_type_02.dta", replace
 
 
 * browse firmid imp_value imp_tot_type rp_imp_value rp_imp_tot_type
 
 *check the totals
 foreach vv in imp rp_imp {
	gen check_`vv'=`vv'_value-`vv'_tot_type
	summ check_`vv'
 }
 

